package job.sql.remove_duplicated;

import java.io.IOException;
import java.util.List;

import job.sql.random.Sql_Random;
import mapred.config.Compress;
import mapred.config.InputConfig;
import mapred.config.JobConfig;
import mapred.config.OutputConfig;

import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.KeyValueTextInputFormat;
import org.apache.hadoop.mapred.RunningJob;
import org.apache.hadoop.mapred.TextOutputFormat;

public class Sql_RemoveDuplicated_Random extends Sql_Random {

    private String[] groups;

    public Sql_RemoveDuplicated_Random(List<String> groups) throws IOException {
        super();
        this.groups = groups.toArray(new String[0]);
    }

    @Override
    public InputConfig getInputConfig(FileSystem fs) throws IOException {
        return InputConfig.newInstance(KeyValueTextInputFormat.class, groups);
    }

    @Override
    protected OutputConfig getOutputConfig(FileSystem fs) throws IOException {
        return OutputConfig.newInstance(TextOutputFormat.class, Text.class,
                Text.class, "/user/guojw/SourcePage_NonDuplicated/CJK_WOW",
                Compress.GZIP);
    }

    @Override
    protected void tearDown(JobConfig conf, FileSystem fs, RunningJob job)
            throws IOException {
        fs.delete(new Path("Group"), true);
    }

}
